Skip to contents

Overview

The goal of Omix is to provide tools in R to build a complete analysis workflow for integrative analysis of data generated from multi-omics platform.

  • Generate a multi-omics object using MultiAssayExperiment.

  • Quality control of single-omics data.

  • Formatting, normalisation, denoising of single-omics data.

  • Separate single-omics analyses.

  • Integration of multi-omics data for combined analysis.

  • Publication quality plots and interactive analysis reports based of shinyApp.

Currently, Omix supports the integration of bulk transcriptomics and bulk proteomics.

Running Omix

The Omix pipeline requires the following input:

  • rawdata_rna: A data-frame containing raw RNA count with rownames as gene and colnames as samples.

  • rawdata_protein: A data-frame containing protein abundance with rownames as proteins and colnames as samples.

  • map_rna: A data-frame of two columns named primary and colname where primary should contain unique sample name with a link to sample metadata and colname is the column names of the rawdata_rna data-frame.

  • map_protein: A data-frame of two columns named primary and colname where primary should contain unique sample name with a link to sample metadata and colname is the column names of the rawdata_protein data-frame.

  • metadata_rna: A data-frame containing rna assay specific metadata where rownames are same as the colnames of rawdata_rna data.frame.

  • metadata_protein: A data-frame containing protein assay specific metadata where rownames are same as the colnames of rawdata_protein data.frame.

  • individual_metadata: A data-frame containing individual level metadata for both omics assays.

Call required packages for vignette:

library(Omix)
## 

First we must load the data from Omix for the vignette:

outputDir <- tempdir()
rawdata_rna_fp <- file.path(outputDir, "raw_counts_MTG_CV.rds")
rawdata_protein_fp <- file.path(outputDir, "raw_proteomics_MTG_CV.rds")
map_rna_fp <- file.path(outputDir, "map_RNA_MTG_CV.rds")
map_prot_fp <- file.path(outputDir, "map_prot_MTG_CV.rds")
metadata_rna_fp <- file.path(outputDir, "metadata_rna_MTG_CV.rds")
metadata_prot_fp <-  file.path(outputDir, "metadata_prot_MTG_CV.rds")
individual_metadata_fp <-file.path(outputDir, "metadata_map_MTG_CV_new.rds")
ctd_fp <-file.path(outputDir, "ctd.rds")
ensembl_fp  <-file.path(outputDir, "ensembl.rds")

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/raw_counts_MTG_CV.rds",destfile=rawdata_rna_fp,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/raw_proteomics_MTG_CV.rds",destfile=rawdata_protein_fp,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/map_RNA_MTG_CV.rds",destfile=map_rna_fp ,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/map_prot_MTG_CV.rds",destfile=map_prot_fp ,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_rna_MTG_CV.rds",destfile=metadata_rna_fp ,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_prot_MTG_CV.rds",destfile=metadata_prot_fp ,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/data_MTG_CV/metadata_map_MTG_CV_new.rds",destfile=individual_metadata_fp ,headers = c(Authorization = paste("token", "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")))


download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/ctd-2.rds",destfile=ctd_fp ,headers = c(Authorization = paste("token", "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")))

download.file(url = "https://raw.githubusercontent.com/eleonore-schneeg/OmixData/main/ensembl_mappings_human.tsv",destfile=ensembl_fp ,headers = c(Authorization = paste("token", "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")))

rawdata_rna <- as.data.frame(readRDS(paste0(outputDir,'/raw_counts_MTG_CV.rds')))
rawdata_protein <- as.data.frame(readRDS(paste0(outputDir,'/raw_proteomics_MTG_CV.rds')))
map_rna <- as.data.frame(readRDS(paste0(outputDir,'/map_RNA_MTG_CV.rds')))
map_prot <- as.data.frame(readRDS(paste0(outputDir,'/map_prot_MTG_CV.rds')))
metadata_rna<- as.data.frame(readRDS(paste0(outputDir,'/metadata_rna_MTG_CV.rds')))
metadata_prot <-  as.data.frame(readRDS(paste0(outputDir,'/metadata_prot_MTG_CV.rds')))
individual_metadata <- as.data.frame(readRDS(paste0(outputDir,'/metadata_map_MTG_CV_new.rds')))
ctd <-  readRDS(paste0(outputDir,'/ctd.rds'))
ensembl <-read.delim(file=paste0(outputDir,'/ensembl.rds'), sep = '\t', header = TRUE)
rna_qc_data_matrix <- NULL

Sanity check

all(rownames(metadata_rna) == colnames(rawdata_rna))
## [1] TRUE
all(rownames(metadata_prot) == colnames(rawdata_protein))
## [1] TRUE

Raw rna counts

rawdata_rna is a data-frame of raw counts, with features as rows and samples as columns

print(rawdata_rna[1:5,1:5])
##                 IGF117756 IGF117759 IGF117764 IGF117772 IGF117778
## ENSG00000223972         1         1         1         0         0
## ENSG00000227232        40        30        18        15        35
## ENSG00000278267         1         0         3         4         3
## ENSG00000243485         0         0         0         0         0
## ENSG00000284332         0         0         0         0         0

Raw protein abundance

rawdata_protein is a data-frame of raw protein abundances, with features as rows and samples as columns

print(rawdata_protein[15:20,15:20])
##                20110137_MTG_P1WG4_001 20110272_MTG_P1WG7_001
## TUBAL3                      7158.6100              9201.4900
## FBLL1                         67.4334                60.1956
## CASTOR2                       17.3159                26.2499
## UNC119;UNC119B                20.3847                12.3418
## YJEFN3                             NA                23.4309
## ARHGAP32                      11.9989                17.0831
##                20129990_MTG_P1WH4_001 A277s12_MTG_P3WD1_001
## TUBAL3                     7294.23000            8982.00000
## FBLL1                        85.75520              62.94880
## CASTOR2                      15.63710              19.52160
## UNC119;UNC119B               19.64770               8.60717
## YJEFN3                             NA              25.40000
## ARHGAP32                      5.57716               9.02670
##                A272s15_MTG_P3WC8_001 A297s16_MTG_P3WD7_001
## TUBAL3                    10871.4000            8487.21000
## FBLL1                        46.9068              57.40500
## CASTOR2                           NA              22.37250
## UNC119;UNC119B                    NA              17.10210
## YJEFN3                       19.7786              16.63510
## ARHGAP32                          NA               7.95878

Maps

Maps are data frame containing two columns: primary and colname. The primary column should be the individual ID the individual metadata, and the colname the matched sample names from the raw matrices columns. If sample and individual ids are the same, maps aren’t needed (primary and colnames are the same).

print(head(map_prot))
##        primary                colname
## 1   PDC016-MTG  PDC016_MTG_P3WH10_001
## 2   PDC026-MTG   PDC026_MTG_P4WA3_001
## 3  A019/12-MTG A019s12_MTG_P2WD10_001
## 4 20130400-MTG 20130400_MTG_P1WH9_001
## 5  A127/11-MTG  A127s11_MTG_P2WH3_001
## 6  A053/11-MTG  A053s11_MTG_P2WF5_001
print(head(map_rna))
##        primary   colname
## 1  A115/17-SSC IGF117745
## 2 19950030-SSC IGF117746
## 3  A210/05-MTG IGF117747
## 4 20100742-SSC IGF117748
## 5 19920001-SSC IGF117749
## 6  A138/12-MTG IGF117750

Technical metadata

Technical metadata are data-frames contain the column colname that should match the sample names in the raw matrices, and any additional columns related to technical artefacts like batches

print(head(metadata_rna))
##              sample_id sample_name RIN Amyloid_Beta       pTau
## IGF117756 19930222-MTG   IGF117756 2.7     2.027670 6.98931000
## IGF117759 20129990-MTG   IGF117759 2.0     3.262270 2.55134000
## IGF117764  A127/11-MTG   IGF117764 4.6     0.147259 0.00247153
## IGF117772 20100742-MTG   IGF117772 7.5           NA         NA
## IGF117778 20110137-MTG   IGF117778 4.8     3.828680 2.77941000
## IGF117784 19930148-MTG   IGF117784 1.7     5.015050 2.94966000
##           pct_pTau_Positive        PHF1
## IGF117756        5.25975000 2.420650000
## IGF117759        0.37497600 1.564080000
## IGF117764        0.00876194 0.000519246
## IGF117772                NA          NA
## IGF117778        2.56623000          NA
## IGF117784        0.71418700 2.409610000
print(head(metadata_prot))
##                           sample_id            sample_name batch
## PDC016_MTG_P3WH10_001    PDC016-MTG  PDC016_MTG_P3WH10_001    P3
## PDC026_MTG_P4WA3_001     PDC026-MTG   PDC026_MTG_P4WA3_001    P4
## A019s12_MTG_P2WD10_001  A019/12-MTG A019s12_MTG_P2WD10_001    P2
## 20130400_MTG_P1WH9_001 20130400-MTG 20130400_MTG_P1WH9_001    P1
## A127s11_MTG_P2WH3_001   A127/11-MTG  A127s11_MTG_P2WH3_001    P2
## A053s11_MTG_P2WF5_001   A053/11-MTG  A053s11_MTG_P2WF5_001    P2

Individual metadata

individual_metadata should contain individual level metadata, where one column matches the primary column in maps.

print(head(individual_metadata))
##    case_id brain_region age trem2_all    BBN_ID sex diagnosis apoe_final Braak
## 1   PDC016          MTG  91        CV BBN_10099   F   Control      E3/E3     2
## 2   PDC026          MTG  80        CV BBN_10109   F   Control      E2/E3     2
## 3  A019/12          MTG  92        CV BBN_10611   F        AD      E3/E3     3
## 4 20130400          MTG  65        CV BBN_14801   F   Control      E3/E3     1
## 5  A127/11          MTG  73        CV BBN_16213   M   Control      E3/E3     0
## 6  A053/11          MTG  77        CV BBN_18399   M   Control      E3/E3     0
##   PMD    sample_id            sample_name   amyloid       pTau        PHF1 AD
## 1  22   PDC016-MTG  PDC016_MTG_P3WH10_001 0.5852850 0.00649899 0.001686760  0
## 2  23   PDC026-MTG   PDC026_MTG_P4WA3_001 0.7826840 0.92852500 0.678769000  0
## 3  30  A019/12-MTG A019s12_MTG_P2WD10_001 0.3078820 0.01312750 0.017976000  1
## 4  47 20130400-MTG 20130400_MTG_P1WH9_001        NA         NA          NA  0
## 5  23  A127/11-MTG  A127s11_MTG_P2WH3_001 0.1472590 0.00247153 0.000519246  0
## 6  11  A053/11-MTG  A053s11_MTG_P2WF5_001 0.0455756 0.00222531 0.010463800  0
##   MTG SSC
## 1   1   0
## 2   1   0
## 3   1   0
## 4   1   0
## 5   1   0
## 6   1   0

Optional inputs

Optional inputs contain additional data frame used for QC visualisation purposes

print(head(rna_qc_data_matrix))
## NULL

Step one - Generate a MultiAssayExperiment object

To run Omix, we first need to generate a multi-omics object The package currently supports transcriptomics and proteomics bulk data only.

# Convert metadata type from character to factor

individual_metadata$diagnosis <- factor(individual_metadata$diagnosis,
                                        levels = c("Control", "AD"))
individual_metadata$sex <- factor(individual_metadata$sex)

#Generate multiassay object
multiomics_object_MTG=generate_multiassay(rawdata_rna =rawdata_rna,
                                   rawdata_protein = rawdata_protein,
                                   individual_to_sample=FALSE,
                                   map_rna = map_rna,
                                   map_protein = map_prot,
                                   metadata_rna = metadata_rna,
                                   metadata_protein = metadata_prot,
                                   individual_metadata = individual_metadata,
                                   map_by_column = 'sample_id',
                                   rna_qc_data=FALSE,
                                   rna_qc_data_matrix=NULL,
                                   organism='human')
##  Ensembl ID conversion to gene symbol
##  Retrieval of gene biotype
## harmonizing input:
##   removing 58 sampleMap rows with 'colname' not in colnames of experiments
##  RNA raw data loaded
## class: SummarizedExperiment 
## dim: 58884 29 
## metadata(1): metadata
## assays(1): rna_raw
## rownames(58884): ENSG00000223972 ENSG00000227232 ... ENSG00000277475
##   ENSG00000268674
## rowData names(3): ensembl_gene_id gene_name gene_biotype
## colnames(29): IGF117756 IGF117759 ... IGF117836 IGF117837
## colData names(7): sample_id sample_name ... pct_pTau_Positive PHF1
##  Protein raw data loaded
## class: SummarizedExperiment 
## dim: 3228 39 
## metadata(1): metadata
## assays(1): protein_raw
## rownames(3228):
##   IGKV2-28;IGKV2-29;IGKV2-30;IGKV2-40;IGKV2D-26;IGKV2D-28;IGKV2D-29;IGKV2D-30;IGKV2D-40
##   IGKV3-11;IGKV3D-11 ... FAM169A SEC23IP
## rowData names(1): gene_name
## colnames(39): PDC016_MTG_P3WH10_001 PDC026_MTG_P4WA3_001 ...
##   20196919_MTG_P2WD1_001 20196928_MTG_P2WD4_001
## colData names(3): sample_id sample_name batch
##  MultiAssayExperiment object generated!

The MultiAssayExperiment object was succesfully created. The following steps will process and perform QC on each omic layers of the multiassay object.

Step two - Process transcriptomics data

multiomics_object_MTG=process_rna(multiassay=multiomics_object_MTG,
            transformation='vst',
            protein_coding=TRUE,
            min_count = 10,
            min_sample = 0.5,
            dependent =  "diagnosis",
            levels = c("Control","AD"),
            covariates=c('age','sex','PMD'),
            filter=TRUE,
            batch_correction=TRUE,
            batch=NULL,
            remove_sample_outliers= FALSE)
##   the design formula contains one or more numeric variables with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
##   the design formula contains one or more numeric variables that have mean or
##   standard deviation larger than 5 (an arbitrary threshold to trigger this message).
##   Including numeric variables with large mean can induce collinearity with the intercept.
##   Users should center and scale numeric variables in the design to improve GLM convergence.
##  NORMALISATION & TRANSFORMATION
##  GENE FILTERING
##  Keeping only protein coding genes
##  39155 / 58884 non protein coding genes were dropped
##  19729 protein coding genes kept for analysis
##  QC parameters selected require genes to have at least 50 % of samples with counts of 10 or more
##  4884 / 19729 genes were dropped
##  14845 genes kept for analysis
##  VST TRANSFORMATION
##  BATCH CORRECTION
##  Using Limma on  to remove technical artefacts, and age as biological confoundersUsing Limma on  to remove technical artefacts, and sex as biological confoundersUsing Limma on  to remove technical artefacts, and PMD as biological confounders
##  Transcriptomics data processed!
##  Processing parameters saved in metadata

Step three - Process proteomics data

multiomics_object_MTG=process_protein(
    multiassay=multiomics_object_MTG,
    filter=TRUE,
    min_sample = 0.5,
    dependent =  "diagnosis",
    levels = c("Control","AD"),
    imputation = 'minimum_value',
    remove_feature_outliers= FALSE,
    batch_correction= FALSE,
    batch="batch",
    correction_method="median_centering",
    remove_sample_outliers=FALSE,
    denoise=TRUE,
    covariates=c('PMD','sex','age'))
##  SCALING NORMALIZATION
##  FILTERING
##  322 / 3228 proteins filtered
##  2906 proteins kept for analysis
##  IMPUTATION
##  Imputation of remaining missing values based on
## 50% of minimum level of abundance for each protein
##  DENOISING BIOLOGICAL COVARIATES
##  Using Limma on PMD as biological confoundersUsing Limma on sex as biological confoundersUsing Limma on age as biological confounders

The processing parameter are stored in the object

Step Four - Single omic analyses

Transcriptomics differential expression using DEseq2

Now we run a differential expression analysis between groups of interest, where the first level, here control is the reference. The function requires to run process_rna first, with covariates of interest. These are adjusted for in the DE.

multiomics_object_MTG=rna_DE_analysis(multiassay=multiomics_object_MTG,
                    dependent='diagnosis',
                    levels=c('Control','AD'),
                    filter_protein_coding = TRUE,
                    log2FoldChange = 0.2)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

Differential expression results for transcripts are saved in the multiomics_object_MTG@metadata$DEG slot

DT::datatable(multiomics_object_MTG@metadata$DEG$ADvsControl,
              rownames = FALSE, 
              escape = FALSE,
              options = list(pageLength = 20, 
                             scrollX=T, 
                             autoWidth = TRUE,
                             columnDefs = list(list(width = '500px', targets = 1)),
                             dom = 'Blfrtip'))

Volcano plot of differentially expressed genes is saved in the multiomics_object_MTG@metadata$DEG$plot slot

volcano_interactive(multiomics_object_MTG@metadata[["DEG"]]$ADvsControl, padj=0.05)

Proteomics differential expression using limma

Now we run a differential expression analysis between groups of interest, where the first level, here control is the reference. If the data is already denoised for covariates of interest process_rna, set covariates as `NULL``

multiomics_object_MTG=protein_DE_analysis(multiomics_object_MTG,
                         slot="protein_processed",
                         dependent='diagnosis',
                         covariates=NULL,
                         levels=c('Control','AD'),
                         log2FoldChange = 0.2)

Differential expression results for proteins are saved in the multiomics_object_MTG@metadata$DEP slot

DT::datatable(multiomics_object_MTG@metadata$DEP$ADvsControl,
              rownames = FALSE, 
              escape = FALSE,
              options = list(pageLength = 20, 
                             scrollX=T, 
                             autoWidth = TRUE,
                             columnDefs = list(list(width = '500px', targets = 1)),
                             dom = 'Blfrtip'))

Volcano plot of differentially abundant proteins is saved in the multiomics_object_MTG@metadata$DEP$plot slot

volcano_interactive(multiomics_object_MTG@metadata[["DEP"]]$ADvsControl, padj=0.05)
  • The differential analysis of transcriptomics data yielded 12 and 56 up and downregulated genes respectively (absolute Log2FoldChange >0.1 and adjusted p-value < 0.05) (Figure 3A).
  • For proteomics data, 58 in the positive direction against 23 in the negative direction (Figure 3B).

Comparing differentially expressed genes and protein

Since the magnitude of changes are not directly comparable between platforms, but their directionality are, we assessed whether significant differential features overlapped between the two platforms and whether their direction of effects were concordant or discordant.

DE_comparison=single_omic_comparisons(multiassay=multiomics_object_MTG,
                                       slot='ADvsControl',
                                       threshold = 0.05,
                                       pvalue = 'pval',
                                       filtering_options=NULL)

Differential analyses reveal partial concordance between transcriptomics and proteomics

volcano_interactive_comparison(DE_comparison$dataframe)
corr= cor(DE_comparison$dataframe$log2FoldChange.x,DE_comparison$dataframe$log2FoldChange.y)

We found a r=0.2230505 correlation between pairwise log2FoldChanges (p-value <0.05) indicating a moderate positive correlation between the mRNA and protein level changes, which is reinforced by the existence of discordant pairs.

DT::datatable(DE_comparison$dataframe,
              rownames = FALSE, 
              escape = FALSE,
              options = list(pageLength = 20, 
                             scrollX=T, 
                             autoWidth = TRUE,
                             columnDefs = list(list(width = '500px', targets = 1)),
                             dom = 'Blfrtip'))

Overall, these single platform analyses highlighted obvious differences between transcriptomics and proteomics and should be further studied to differentiate technical versus biological effects. Overall, results highlight that:

  • different platforms do not cover the same exact set of features
  • of the features that intersect, most see a significant change in one platform only
  • of the features that significantly change in both platforms, some may see discordant directionality of effects
Protein accumulation

Protein with a positive Log2FC but negative mRNA regulation may point to those accumulating in the brain as AD progresses

DE_comparison$dataframe$gene_name[which(DE_comparison$dataframe$direction=='Discordant'&DE_comparison$dataframe$log2FoldChange.y>0.2)]
##   [1] "ACAN"     "ACAT2"    "ACOT8"    "ACSL6"    "ADAM10"   "ADAR"    
##   [7] "AKR1A1"   "AKR7A2"   "ALDH18A1" "ALDH9A1"  "ALMS1"    "ANK1"    
##  [13] "ANP32A"   "AP3M1"    "APOE"     "APP"      "APRT"     "ARCN1"   
##  [19] "ARF1"     "ARF4"     "ARHGDIA"  "ARL1"     "ARL3"     "ARMC10"  
##  [25] "ARMC8"    "ASS1"     "ATP1B2"   "ATP2B3"   "ATP2B4"   "ATP8A1"  
##  [31] "ATP9A"    "BAG5"     "BANF1"    "BCL2L13"  "BLOC1S2"  "BLVRB"   
##  [37] "C1QA"     "C1QC"     "CACNA2D2" "CALR"     "CAMK2G"   "CAMSAP2" 
##  [43] "CAPNS1"   "CBR1"     "CBR3"     "CBX3"     "CCDC43"   "CCDC91"  
##  [49] "CCS"      "CD200"    "CDK14"    "CHMP2B"   "CHMP7"    "CHORDC1" 
##  [55] "CISD2"    "CKB"      "CLIC4"    "CNN3"     "CNRIP1"   "COMMD10" 
##  [61] "COMMD2"   "COPB1"    "COPG1"    "COPS6"    "CPE"      "CPNE3"   
##  [67] "CPNE6"    "CSNK1A1"  "CSTB"     "CTSB"     "DDOST"    "DHRS7"   
##  [73] "DHX15"    "DHX9"     "DMD"      "DNPEP"    "DRG1"     "DYNC1I2" 
##  [79] "EIF1"     "EIF2B3"   "EIF2B4"   "EIF3D"    "EIF3E"    "EIF4A3"  
##  [85] "EIF4G3"   "EMD"      "ENO1"     "EPM2AIP1" "ESD"      "EXOC5"   
##  [91] "F3"       "FBXL16"   "FDPS"     "FHL1"     "FIBP"     "FKBP4"   
##  [97] "FN3KRP"   "FNTA"     "FSD1L"    "GABBR1"   "GAK"      "GBA"     
## [103] "GCA"      "GGH"      "GLO1"     "GNAQ"     "GNAS"     "GPM6A"   
## [109] "GRIN1"    "GSPT1"    "GSPT2"    "GSTM5"    "GYS1"     "HACD3"   
## [115] "HARS2"    "HBB"      "HBS1L"    "HMGB1"    "HNRNPK"   "HOOK3"   
## [121] "HSDL1"    "HSPB1"    "HSPB6"    "IFT27"    "INPP1"    "INPP5A"  
## [127] "IPO9"     "ISOC1"    "ITPR1"    "KCNA2"    "KCNMA1"   "LAMTOR2" 
## [133] "LCP1"     "LGALS3"   "LIMS1"    "LIN7C"    "LMAN1"    "LSM3"    
## [139] "LTA4H"    "LUC7L2"   "LUZP1"    "LYSMD2"   "MAL2"     "MAOB"    
## [145] "MAPK1"    "MAPT"     "MARCKS"   "MAST1"    "MPC2"     "MRPS27"  
## [151] "MTCH1"    "MTCH2"    "MTX3"     "NAMPT"    "NANS"     "NDRG4"   
## [157] "NEFH"     "NF1"      "NLGN2"    "NOP56"    "NPTX1"    "NQO1"    
## [163] "NT5C1A"   "NUDCD1"   "NUDT10"   "OTUD6B"   "PAFAH1B1" "PAFAH1B2"
## [169] "PCYOX1"   "PELO"     "PFN1"     "PFN2"     "PGAM1"    "PGD"     
## [175] "PHPT1"    "PIK3R4"   "PITRM1"   "PLAA"     "PLCD1"    "PLCXD3"  
## [181] "PLD3"     "PPIA"     "PPIG"     "PPIL1"    "PPT1"     "PRAF2"   
## [187] "PREP"     "PREPL"    "PRKAG2"   "PRNP"     "PRPSAP1"  "PRUNE1"  
## [193] "PSMC4"    "PSMD10"   "PSME2"    "PTPRE"    "PYGB"     "RAB39B"  
## [199] "RALYL"    "RAN"      "RANBP9"   "RANGAP1"  "RAPGEF6"  "RBBP9"   
## [205] "RBP1"     "RCN1"     "REPS1"    "RMDN3"    "RNPS1"    "RPN1"    
## [211] "RPN2"     "RSU1"     "RYR2"     "S100A1"   "S100A16"  "SACM1L"  
## [217] "SAE1"     "SART3"    "SCAMP1"   "SCAMP5"   "SCARB2"   "SEC13"   
## [223] "SEC22B"   "SEC24C"   "SERPINH1" "SETD7"    "SIRT3"    "SKP1"    
## [229] "SLC1A3"   "SLC2A3"   "SLC3A2"   "SLC4A1"   "SLC4A4"   "SLC7A14" 
## [235] "SLC8A1"   "SMS"      "SNRNP200" "SNRNP40"  "SNRPA1"   "SNTB2"   
## [241] "SON"      "SQSTM1"   "SRI"      "SRP54"    "SRP72"    "SRSF1"   
## [247] "SRSF2"    "SRSF7"    "SSB"      "STAM2"    "SURF4"    "SV2B"    
## [253] "SYNPR"    "SYP"      "SYT5"     "TBCB"     "TMA7"     "TMEM11"  
## [259] "TMEM163"  "TMEM30A"  "TMEM65"   "TMX3"     "TOP1"     "TPP1"    
## [265] "TRA2A"    "TRAPPC10" "TSN"      "TSNAX"    "TSR2"     "TXNRD1"  
## [271] "UBA1"     "UBAC1"    "UBLCP1"   "UFC1"     "UFL1"     "VCL"     
## [277] "VPS18"    "VPS25"    "VPS29"    "VTN"      "WDR1"     "XPO7"

Step Five - Multi-omics integration using DIABLO

By default, the vertical integration is performed on processed slots.

DIABLO is a supervised multi-omics integration model that requires the input of a dependent variable. The function will perform tuning on the defined range, and number of components chosen.

multiomics_object_MTG=vertical_integration(multiomics_object_MTG,
                                  slots = c(
                                   "rna_processed",
                                   "protein_processed"
                                 ),
                                integration='DIABLO',
                                intersect_genes=FALSE,
                                ID_type='gene_name',
                                dependent='diagnosis',
                                levels= c("Control", "AD"),
                                design="cor",
                                ncomp=2,
                                range=list(mRNA=seq(10,100,by=10),
                                              proteins=seq(10,100,by=10)))
## harmonizing input:
##   removing 68 sampleMap rows not in names(experiments)
## 
## ── MODEL TUNING ──
## 
## Design matrix has changed to include Y; each block will be
##             linked to Y.
## 
## You have provided a sequence of keepX of length: 10 for block mRNA and 10 for block proteins.
## This results in 100 models being fitted for each component and each nrepeat, this may take some time to run, be patient!
## 
## You can look into the 'BPPARAM' argument to speed up computation time.
## 
## tuning component 1
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |======================================================================| 100%
## tuning component 2
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |======================================================================| 100%
## Design matrix has changed to include Y; each block will be
##             linked to Y.
##  VERTICAL INTEGRATION WITH DIABLO

Step Six - Multi-omics analyses (Supervised Example using DIABLO)

The user may want to perform the integrative downstream analyses step-by-step, or to run integrative_results_supervised, which will create a results object containing, multi-omics signatures, network visualisation, cell type and functional enrichment, as well as comparison of signatures against the OpenTargets knowledge base.

integrative_results_MTG=integrative_results_supervised(multiomics_object_MTG,
                                integration = "DIABLO",
                                component = 1,
                                correlation_threshold=0.4,
                                enrichment_method='enrichr',
                                disease_id='MONDO_0004975',
                                simplify=FALSE,
                                compare_communities=FALSE)

## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2021... Done.
##   Querying GO_Cellular_Component_2021... Done.
##   Querying GO_Biological_Process_2021... Done.
##   Querying WikiPathways_2021_Human... Done.
##   Querying Reactome_2016... Done.
##   Querying KEGG_2021_Human... Done.
##   Querying MSigDB_Hallmark_2020... Done.
##   Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2021... Done.
##   Querying GO_Cellular_Component_2021... Done.
##   Querying GO_Biological_Process_2021... Done.
##   Querying WikiPathways_2021_Human... Done.
##   Querying Reactome_2016... Done.
##   Querying KEGG_2021_Human... Done.
##   Querying MSigDB_Hallmark_2020... Done.
##   Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2021... Done.
##   Querying GO_Cellular_Component_2021... Done.
##   Querying GO_Biological_Process_2021... Done.
##   Querying WikiPathways_2021_Human... Done.
##   Querying Reactome_2016... Done.
##   Querying KEGG_2021_Human... Done.
##   Querying MSigDB_Hallmark_2020... Done.
##   Querying BioCarta_2016... Done.
## Parsing results... Done.
## Uploading data to Enrichr... Done.
##   Querying GO_Molecular_Function_2021... Done.
##   Querying GO_Cellular_Component_2021... Done.
##   Querying GO_Biological_Process_2021... Done.
##   Querying WikiPathways_2021_Human... Done.
##   Querying Reactome_2016... Done.
##   Querying KEGG_2021_Human... Done.
##   Querying MSigDB_Hallmark_2020... Done.
##   Querying BioCarta_2016... Done.
## Parsing results... Done.

Step seven - Exploring the integrative results object

Detrimental signature - equivalent of up-regulated in AD

integrative_results_MTG$detrimental$multiomics_signature
## $rna
## [1] "ARMC3"    "HPS4"     "CARD11"   "SLC16A12" "ZNF704"   "XPC"      "PI16"    
## 
## $protein
##  [1] "DDX39B"  "DARS1"   "COTL1"   "TPP1"    "HNRNPF"  "EPHX1"   "EEF1E1" 
##  [8] "PRDX6"   "PLCD3"   "PYGM"    "EIF4A2"  "MYH14"   "ALDH1L1" "SLC7A14"
## [15] "CAPG"    "KRT1"    "RPS6KA2" "CYB5R3"  "MAPT"    "GSTM3"   "ADH5"   
## [22] "CMBL"    "CAPN2"   "CRYL1"   "GANAB"   "NAGK"    "XPO1"    "PLEC"   
## [29] "DDAH2"   "MACF1"   "GNA11"   "ISOC1"   "EIF4A1"  "SMS"     "DST"    
## [36] "ALDH9A1" "BLVRB"   "UBA6"    "CPNE6"

Multi-omics network of detrimental features

.interactive_network(integrative_results_MTG$detrimental$network$graph, communities = FALSE)

Open targets

integrative_results_MTG$detrimental$open_targets